# -*- coding: utf-8 -*-
"""
@Time    : 2024/6/20 14:14 
@Author  : ZhangShenao 
@File    : crawl_serialization.py 
@Desc    : 单线程爬虫
"""
import time

import requests


# 从指定url的页面中下载数据
def download_page(url):
    response = requests.get(url)
    print(f'download {len(response.content)} from {url}')


# 从所有页面中下载数据
def download_all_page(urls):
    for url in urls:
        download_page(url)


if __name__ == '__main__':
    sites = ['https://en.wikipedia.org/wiki/Portal:Arts', 'https://en.wikipedia.org/wiki/Portal:History',
             'https://en.wikipedia.org/wiki/Portal:Society', 'https://en.wikipedia.org/wiki/Portal:Biography',
             'https://en.wikipedia.org/wiki/Portal:Mathematics', 'https://en.wikipedia.org/wiki/Portal:Technology',
             'https://en.wikipedia.org/wiki/Portal:Geography', 'https://en.wikipedia.org/wiki/Portal:Science',
             'https://en.wikipedia.org/wiki/Computer_science',
             'https://en.wikipedia.org/wiki/Python_(programming_language)',
             'https://en.wikipedia.org/wiki/Java_(programming_language)', 'https://en.wikipedia.org/wiki/PHP',
             'https://en.wikipedia.org/wiki/Node.js', 'https://en.wikipedia.org/wiki/The_C_Programming_Language',
             'https://en.wikipedia.org/wiki/Go_(programming_language)']

    start_time = time.perf_counter()
    download_all_page(sites)
    end_time = time.perf_counter()
    print(f'Crawl {len(sites)} pages in {end_time - start_time} seconds')
